# ============================== SETUP ===============================
rm(list = ls())
set.seed(1)
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(vars)

# ========================= DATA WRANGLING ==========================
# ---- tweets ----
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
tweets <- tweets[tweets$RelevantLogit==1,]
tweets <- tweets[tweets$ads_dict==0,]
# robustness check: only keep followers following 7 or more leaders
tweets <- tweets[tweets$seedfollow >= 7, ]

tweets$race_dict        <- ifelse(tweets$race_dict >=2, 1, 0)
tweets$gender_dict      <- ifelse(tweets$gender_dict >=2, 1, 0)
tweets$nationalism_dict <- ifelse(tweets$nationalism_dict >=2, 1, 0)
tweets$partisan_dict    <- ifelse(tweets$partisan_dict >=2, 1, 0)
tweets$religion_dict    <- ifelse(tweets$religion_dict >=2, 1, 0)

colnames(tweets)
tweets$date <- as.Date(tweets$created_at)

# individual / organization / follower by theme (daily means)
tweets$group <- ifelse(tweets$individual==1, "individual", 
                       ifelse(tweets$organization==1, "organization", "follower"))
tweets_iof <- tweets[,c("date","group","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")] %>% 
  group_by(date, group) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender = mean(gender_dict, na.rm = TRUE)) %>%
  pivot_longer(cols = c(Religion, Race, Nationalism, Partisan, Gender), names_to = "theme") %>%
  pivot_wider(names_from = group, values_from = value)

# leader / follower by theme (daily means)
tweets$group_lf <- ifelse(tweets$seedfollow == 999, "leader", "follower")
tweets_lf <- tweets[,c("date","group_lf","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")] %>% 
  group_by(date, group_lf) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender = mean(gender_dict, na.rm = TRUE)) %>%
  pivot_longer(cols = c(Religion, Race, Nationalism, Partisan, Gender), names_to = "theme") %>%
  pivot_wider(names_from = group_lf, values_from = value)

# merge to add leader column; keep follower/individual/organization from iof
tweets <- merge(tweets_iof, tweets_lf[,c("date","theme","leader")], by = c("date","theme"))
tweets <- tweets[order(tweets$date),]
tweets$platform <- 1

# ---- gabs ----
gabs <- fread("../datasets/input/gabs.csv", stringsAsFactors = FALSE)
gabs <- gabs[gabs$RelevantLogit==1,]
gabs <- gabs[gabs$ads_dict==0,]
# robustness check: only keep followers following 7 or more leaders
gabs <- gabs[gabs$seedfollow >= 7, ]

gabs$race_dict        <- ifelse(gabs$race_dict >=2, 1, 0)
gabs$gender_dict      <- ifelse(gabs$gender_dict >=2, 1, 0)
gabs$nationalism_dict <- ifelse(gabs$nationalism_dict >=2, 1, 0)
gabs$partisan_dict    <- ifelse(gabs$partisan_dict >=2, 1, 0)
gabs$religion_dict    <- ifelse(gabs$religion_dict >=2, 1, 0)

colnames(gabs)
gabs$date <- as.Date(gabs$created_at)

# individual / organization / follower by theme (daily means)
gabs$group <- ifelse(gabs$individual==1, "individual", 
                     ifelse(gabs$organization==1, "organization", "follower"))
gabs_iof <- gabs[,c("date","group","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")] %>% 
  group_by(date, group) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender = mean(gender_dict, na.rm = TRUE)) %>%
  pivot_longer(cols = c(Religion, Race, Nationalism, Partisan, Gender), names_to = "theme") %>%
  pivot_wider(names_from = group, values_from = value)

# leader / follower by theme (daily means)
gabs$group_lf <- ifelse(gabs$seedfollow == 999, "leader", "follower")
gabs_lf <- gabs[,c("date","group_lf","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")] %>% 
  group_by(date, group_lf) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender = mean(gender_dict, na.rm = TRUE)) %>%
  pivot_longer(cols = c(Religion, Race, Nationalism, Partisan, Gender), names_to = "theme") %>%
  pivot_wider(names_from = group_lf, values_from = value)

# merge to add leader column; keep follower/individual/organization from iof
gabs <- merge(gabs_iof, gabs_lf[,c("date","theme","leader")], by = c("date","theme"))
gabs <- gabs[order(gabs$date),]
gabs$platform <- 0

# ---- combine across platforms ----
comb <- rbind(gabs, tweets)
comb <- comb[order(comb$date),]
comb <- comb[comb$date>=as.Date("2016-08-15") & comb$date<=as.Date("2021-05-15"),]
comb <- na.omit(comb)
rm(list = c("gabs","tweets","gabs_iof","gabs_lf","tweets_iof","tweets_lf"))

# ============================ VAR MODELS ===========================
# Overall (followers vs leaders)
variables  <- c("follower","leader","platform","theme")
mformula   <- formula(paste0("~", paste0(variables, collapse = " + ")))
model_data <- model.matrix(mformula, comb[, variables])
model_data <- model_data[, 2:ncol(model_data)]

X_endogenous <- model_data[, c("follower","leader")]
X_exogenous  <- model_data[, c("platform","themeNationalism","themePartisan","themeRace","themeReligion")]
p <- 2
var_model_merged    <- VAR(y = X_endogenous, p = p, exogen = X_exogenous)
var_irfs_cum_merged <- irf(var_model_merged, n.ahead = 20, cumulative = TRUE)

# Organization (followers vs organizational leaders)
variables  <- c("follower","organization","platform","theme")
mformula   <- formula(paste0("~", paste0(variables, collapse = " + ")))
model_data <- model.matrix(mformula, comb[, variables])
model_data <- model_data[, 2:ncol(model_data)]

X_endogenous <- model_data[, c("follower","organization")]
X_exogenous  <- model_data[, c("platform","themeNationalism","themePartisan","themeRace","themeReligion")]
var_model_merged_org    <- VAR(y = X_endogenous, p = p, exogen = X_exogenous)
var_irfs_cum_merged_org <- irf(var_model_merged_org, n.ahead = 20, cumulative = TRUE)

# Individual (followers vs individual leaders)
variables  <- c("follower","individual","platform","theme")
mformula   <- formula(paste0("~", paste0(variables, collapse = " + ")))
model_data <- model.matrix(mformula, comb[, variables])
model_data <- model_data[, 2:ncol(model_data)]

X_endogenous <- model_data[, c("follower","individual")]
X_exogenous  <- model_data[, c("platform","themeNationalism","themePartisan","themeRace","themeReligion")]
var_model_merged_indiv    <- VAR(y = X_endogenous, p = p, exogen = X_exogenous)
var_irfs_cum_merged_indiv <- irf(var_model_merged_indiv, n.ahead = 20, cumulative = TRUE)

# ======================= EXTRACT IRF RESULTS =======================
# ---- pooled ----
var_irfs <- var_irfs_cum_merged
variables <- names(var_irfs$irf)
elements_to_pull <- c("irf","Upper","Lower")

irf_data <- NULL
for (el in elements_to_pull) {
  new_irf_info <- var_irfs[el][[1]]
  for (out in variables) {
    new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
    new_irf_var_data_long <- new_irf_var_data %>% gather(cov, value)
    new_irf_var_data_long$out  <- out
    new_irf_var_data_long$day  <- rep(1:nrow(new_irf_var_data), length(unique(new_irf_var_data_long$cov)))
    new_irf_var_data_long$e_type <- el
    irf_data <- rbind(irf_data, new_irf_var_data_long)
  }
}
irf_data$e_type <- recode(irf_data$e_type, `irf`="pe", `Lower`="lwr", `Upper`="upr")
colnames(irf_data)[colnames(irf_data)=="value"] <- "pooled"
irf_data <- irf_data %>% filter(cov != out)
irf_data_wide <- irf_data %>% mutate(pooled = (pooled/10)*100) %>% spread(e_type, pooled)
irf_data_wide$cov <- recode(irf_data_wide$cov, "follower"="Followers", "leader"="Leaders")
irf_data_wide$cov <- factor(irf_data_wide$cov, levels = c("Followers","Leaders"))
irf_data_wide$out <- recode(irf_data_wide$out, "follower"="Followers", "leader"="Leaders")
irf_data_wide$out <- factor(irf_data_wide$out, levels = c("Followers","Leaders"))

# ---- organization ----
var_irfs <- var_irfs_cum_merged_org
variables <- names(var_irfs$irf)
irf_data_org <- NULL
for (el in elements_to_pull) {
  new_irf_info <- var_irfs[el][[1]]
  for (out in variables) {
    new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
    new_irf_var_data_long <- new_irf_var_data %>% gather(cov, value)
    new_irf_var_data_long$out  <- out
    new_irf_var_data_long$day  <- rep(1:nrow(new_irf_var_data), length(unique(new_irf_var_data_long$cov)))
    new_irf_var_data_long$e_type <- el
    irf_data_org <- rbind(irf_data_org, new_irf_var_data_long)
  }
}
irf_data_org$e_type <- recode(irf_data_org$e_type, `irf`="pe", `Lower`="lwr", `Upper`="upr")
colnames(irf_data_org)[colnames(irf_data_org)=="value"] <- "org"
irf_data_org <- irf_data_org %>% filter(cov != out)
irf_data_org_wide <- irf_data_org %>% mutate(org = (org/10)*100) %>% spread(e_type, org)
irf_data_org_wide$cov <- recode(irf_data_org_wide$cov, "follower"="Followers", "organization"="Leaders")
irf_data_org_wide$cov <- factor(irf_data_org_wide$cov, levels = c("Followers","Leaders"))
irf_data_org_wide$out <- recode(irf_data_org_wide$out, "follower"="Followers", "organization"="Leaders")
irf_data_org_wide$out <- factor(irf_data_org_wide$out, levels = c("Followers","Leaders"))

# ---- individual ----
var_irfs <- var_irfs_cum_merged_indiv
variables <- names(var_irfs$irf)
irf_data_indiv <- NULL
for (el in elements_to_pull) {
  new_irf_info <- var_irfs[el][[1]]
  for (out in variables) {
    new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
    new_irf_var_data_long <- new_irf_var_data %>% gather(cov, value)
    new_irf_var_data_long$out  <- out
    new_irf_var_data_long$day  <- rep(1:nrow(new_irf_var_data), length(unique(new_irf_var_data_long$cov)))
    new_irf_var_data_long$e_type <- el
    irf_data_indiv <- rbind(irf_data_indiv, new_irf_var_data_long)
  }
}
irf_data_indiv$e_type <- recode(irf_data_indiv$e_type, `irf`="pe", `Lower`="lwr", `Upper`="upr")
colnames(irf_data_indiv)[colnames(irf_data_indiv)=="value"] <- "indiv"
irf_data_indiv <- irf_data_indiv %>% filter(cov != out)
irf_data_indiv_wide <- irf_data_indiv %>% mutate(indiv = (indiv/10)*100) %>% spread(e_type, indiv)
irf_data_indiv_wide$cov <- recode(irf_data_indiv_wide$cov, "follower"="Followers", "individual"="Leaders")
irf_data_indiv_wide$cov <- factor(irf_data_indiv_wide$cov, levels = c("Followers","Leaders"))
irf_data_indiv_wide$out <- recode(irf_data_indiv_wide$out, "follower"="Followers", "individual"="Leaders")
irf_data_indiv_wide$out <- factor(irf_data_indiv_wide$out, levels = c("Followers","Leaders"))

# ========================== PRODUCE FIGURE =========================
pdf("../plots/FigureA8.pdf", width = 12, height = 4)

plot_db_pooled <- irf_data_wide %>% filter(day == 15)
plot_db_org    <- irf_data_org_wide %>% filter(day == 15)
plot_db_indiv  <- irf_data_indiv_wide %>% filter(day == 15)

ggplot(plot_db_pooled, aes(x = cov, y = pe, ymin = -0.1, ymax = 0.35)) +
  geom_segment(aes(x = plot_db_org$cov, xend = plot_db_org$cov, y = plot_db_org$lwr, yend = plot_db_org$upr, color = "Organizational Leaders"), size = 1) +
  geom_point(data = plot_db_org, aes(x = cov, y = pe, color = "Organizational Leaders"), size = 3, shape = 16) +
  geom_segment(aes(x = as.numeric(cov)+0.1, xend = as.numeric(cov)+0.1, y = lwr, yend = upr, color = "All Leaders"), size = 1) +
  geom_point(data = plot_db_pooled, aes(x = as.numeric(cov)+0.1, y = pe, color = "All Leaders"), size = 3, shape = 16) +
  geom_segment(aes(x = as.numeric(plot_db_indiv$cov)-0.1, xend = as.numeric(plot_db_indiv$cov)-0.1, y = plot_db_indiv$lwr, yend = plot_db_indiv$upr, color = "Individual Leaders"), size = 1) +
  geom_point(data = plot_db_indiv, aes(x = as.numeric(cov)-0.1, y = pe, color = "Individual Leaders"), size = 3, shape = 16) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~ out, nrow = 1) +
  coord_flip() +
  xlab("") +
  scale_y_continuous("\n15-day responses (in percentage points)", expand = c(0,0)) +
  scale_color_manual("", values = c("All Leaders" = "black","Organizational Leaders" = "green4","Individual Leaders" = "blue4")) +
  theme(
    panel.spacing = unit(1.05, "lines"),
    legend.position = "bottom",
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text = element_text(size = 18),
    axis.text.y = element_text(hjust = 0),
    strip.text = element_text(size = 20),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title = element_text(size = 16),
    legend.text = element_text(size = 16)
  )
dev.off()
